import pandas as pd
df_monthly = pd.read_excel('Table_4.3_Natural_Gas_Consumption_by_Sector.xlsx')
df_monthly.head()
| Month | NG-Residentional-sector | NG-Commercial-sector | NG-Industrial-sector | NG-Industrial-sector_02 | NG-Industrial-sector_01 | NG-Industrial-sector_total | Natural Gas Consumed by the Industrial Sector, Total | NG-Transportation-sector | NG-Transportation-sector_01 | NG-Transportation-sector_total | NG-Electric-Power-sector | NG-consumption-total | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1973-01-01 | 843.900 | 392.315 | 0.0 | Not Available | 0.0 | 0.0 | 810.141 | 77.544 | Not Available | 77.544 | 224.100 | 2348.0 |
| 1 | 1973-02-01 | 747.331 | 394.281 | 0.0 | Not Available | 0.0 | 0.0 | 698.671 | 70.212 | Not Available | 70.212 | 215.505 | 2126.0 |
| 2 | 1973-03-01 | 648.504 | 310.799 | 0.0 | Not Available | 0.0 | 0.0 | 731.583 | 66.546 | Not Available | 66.546 | 257.568 | 2015.0 |
| 3 | 1973-04-01 | 465.867 | 231.943 | 0.0 | Not Available | 0.0 | 0.0 | 807.360 | 60.602 | Not Available | 60.602 | 269.228 | 1835.0 |
| 4 | 1973-05-01 | 326.313 | 174.258 | 0.0 | Not Available | 0.0 | 0.0 | 852.091 | 57.101 | Not Available | 57.101 | 319.237 | 1729.0 |
df_monthly.describe()
| Month | NG-Residentional-sector | NG-Commercial-sector | NG-Industrial-sector | NG-Industrial-sector_01 | NG-Industrial-sector_total | Natural Gas Consumed by the Industrial Sector, Total | NG-Transportation-sector | NG-Transportation-sector_total | NG-Electric-Power-sector | NG-consumption-total | |
|---|---|---|---|---|---|---|---|---|---|---|---|
| count | 602 | 602.000000 | 602.000000 | 602.000000 | 602.000000 | 602.000000 | 602.000000 | 602.000000 | 602.000000 | 602.000000 | 602.000000 |
| mean | 1998-01-15 09:36:28.704318976 | 396.200855 | 244.061022 | 91.198199 | 453.591919 | 521.442824 | 715.951678 | 56.165193 | 57.505400 | 465.441158 | 1879.160161 |
| min | 1973-01-01 00:00:00 | 99.781000 | 88.740000 | 0.000000 | 0.000000 | 0.000000 | 376.284000 | 33.378000 | 33.378000 | 137.449000 | 939.930000 |
| 25% | 1985-07-08 18:00:00 | 141.706500 | 135.193500 | 86.991250 | 450.264750 | 507.036750 | 647.969750 | 44.107000 | 44.921500 | 260.210000 | 1523.534000 |
| 50% | 1998-01-16 12:00:00 | 314.465500 | 202.248000 | 95.589000 | 510.493500 | 592.421000 | 717.020000 | 50.929000 | 52.280000 | 369.297000 | 1809.537000 |
| 75% | 2010-07-24 06:00:00 | 645.060500 | 353.924000 | 108.682250 | 565.752000 | 659.294250 | 786.870500 | 65.579000 | 66.447000 | 636.384750 | 2198.633500 |
| max | 2023-02-01 00:00:00 | 1037.197000 | 571.744000 | 173.404000 | 769.000000 | 816.696000 | 1003.192000 | 132.302000 | 136.817000 | 1400.013000 | 3591.691000 |
| std | NaN | 267.547294 | 124.109428 | 42.335883 | 193.356394 | 224.952325 | 105.157712 | 16.741735 | 17.640045 | 260.427382 | 494.798258 |
list(df_monthly)
['Month', 'NG-Residentional-sector', 'NG-Commercial-sector', 'NG-Industrial-sector', 'NG-Industrial-sector_02', 'NG-Industrial-sector_01', 'NG-Industrial-sector_total', 'Natural Gas Consumed by the Industrial Sector, Total', 'NG-Transportation-sector', 'NG-Transportation-sector_01', 'NG-Transportation-sector_total', 'NG-Electric-Power-sector', 'NG-consumption-total']
df_monthly.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 602 entries, 0 to 601 Data columns (total 13 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Month 602 non-null datetime64[ns] 1 NG-Residentional-sector 602 non-null float64 2 NG-Commercial-sector 602 non-null float64 3 NG-Industrial-sector 602 non-null float64 4 NG-Industrial-sector_02 602 non-null object 5 NG-Industrial-sector_01 602 non-null float64 6 NG-Industrial-sector_total 602 non-null float64 7 Natural Gas Consumed by the Industrial Sector, Total 602 non-null float64 8 NG-Transportation-sector 602 non-null float64 9 NG-Transportation-sector_01 602 non-null object 10 NG-Transportation-sector_total 602 non-null float64 11 NG-Electric-Power-sector 602 non-null float64 12 NG-consumption-total 602 non-null float64 dtypes: datetime64[ns](1), float64(10), object(2) memory usage: 61.3+ KB
df_monthly.isna().sum()
Month 0 NG-Residentional-sector 0 NG-Commercial-sector 0 NG-Industrial-sector 0 NG-Industrial-sector_02 0 NG-Industrial-sector_01 0 NG-Industrial-sector_total 0 Natural Gas Consumed by the Industrial Sector, Total 0 NG-Transportation-sector 0 NG-Transportation-sector_01 0 NG-Transportation-sector_total 0 NG-Electric-Power-sector 0 NG-consumption-total 0 dtype: int64
import plotly.express as px
def plot_time_series(df, title, save_file):
fig = px.line(df, x='Month')
for column in df.columns[1:]:
fig.add_scatter(x=df['Month'], y=df[column], name=column, mode='lines+markers')
fig.update_layout(title=title)
fig.write_html(save_file)
fig.show()
# Assuming your dataframe is named 'df'
title = "Natural Gas Consumption by Each Sector"
save_file = "NaturalGasConsumption.html"
plot_time_series(df_monthly, title, save_file)
df_monthly
| Month | NG-Residentional-sector | NG-Commercial-sector | NG-Industrial-sector | NG-Industrial-sector_02 | NG-Industrial-sector_01 | NG-Industrial-sector_total | Natural Gas Consumed by the Industrial Sector, Total | NG-Transportation-sector | NG-Transportation-sector_01 | NG-Transportation-sector_total | NG-Electric-Power-sector | NG-consumption-total | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1973-01-01 | 843.900 | 392.315 | 0.000 | Not Available | 0.000 | 0.000 | 810.141 | 77.544 | Not Available | 77.544 | 224.100 | 2348.000 |
| 1 | 1973-02-01 | 747.331 | 394.281 | 0.000 | Not Available | 0.000 | 0.000 | 698.671 | 70.212 | Not Available | 70.212 | 215.505 | 2126.000 |
| 2 | 1973-03-01 | 648.504 | 310.799 | 0.000 | Not Available | 0.000 | 0.000 | 731.583 | 66.546 | Not Available | 66.546 | 257.568 | 2015.000 |
| 3 | 1973-04-01 | 465.867 | 231.943 | 0.000 | Not Available | 0.000 | 0.000 | 807.360 | 60.602 | Not Available | 60.602 | 269.228 | 1835.000 |
| 4 | 1973-05-01 | 326.313 | 174.258 | 0.000 | Not Available | 0.000 | 0.000 | 852.091 | 57.101 | Not Available | 57.101 | 319.237 | 1729.000 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 597 | 2022-10-01 | 242.225 | 223.715 | 172.865 | 110.718 | 575.114 | 685.832 | 858.697 | 87.152 | 4.514 | 91.666 | 949.669 | 2365.973 |
| 598 | 2022-11-01 | 516.416 | 356.137 | 167.934 | 114.577 | 608.257 | 722.834 | 890.768 | 102.143 | 4.368 | 106.512 | 903.112 | 2772.945 |
| 599 | 2022-12-01 | 839.755 | 495.846 | 169.487 | 117.972 | 636.396 | 754.368 | 923.855 | 124.574 | 4.514 | 129.088 | 993.345 | 3381.890 |
| 600 | 2023-01-01 | 800.256 | 475.998 | 173.404 | 122.121 | 644.956 | 767.077 | 940.481 | 121.970 | 4.514 | 126.484 | 967.981 | 3311.199 |
| 601 | 2023-02-01 | 688.462 | 426.106 | 157.124 | 108.897 | 593.850 | 702.747 | 859.871 | 109.013 | 4.077 | 113.090 | 871.925 | 2959.454 |
602 rows × 13 columns
from pmdarima import auto_arima
import plotly.graph_objects as go
for sector_name in list(df_monthly):
if sector_name != 'Month':
try:
fig = go.Figure()
sector_df = {
'Monthly-Time': df_monthly['Month'],
sector_name: df_monthly[sector_name]
}
sector_df = pd.DataFrame.from_dict(sector_df)
df_filter_index = sector_df.set_index('Monthly-Time')
# Prepare the data for modeling
years = df_filter_index.index
energy_consumption = df_filter_index.values.flatten()
# Split the data into training and testing
horizan = -20
train_data = energy_consumption[:horizan]
test_data = energy_consumption[horizan:]
print('Length of Train Data: {}\nLength of Test Data: {}'.format(len(train_data), len(test_data)))
# Fit the auto ARIMA model
model = auto_arima(train_data, seasonal=True)
model.fit(train_data)
# Generate predictions
predictions = model.predict(n_periods=len(test_data))
predictions_ahead_in_future = model.predict(n_periods=80)
# Plot the training data
fig.add_trace(go.Scatter(
x=years[:horizan], y=train_data, mode='lines+markers', name='Training Data'))
# Plot the testing data and predictions
fig.add_trace(go.Scatter(
x=years[horizan:], y=test_data, mode='lines+markers', name='Testing Data'))
fig.add_trace(go.Scatter(
x=years[horizan:], y=predictions, mode='lines+markers', name='Predictions'))
# Plot the predictions for the future
fig.add_trace(go.Scatter(
x=pd.date_range(start=years[horizan], periods=80, freq='MS'),
y=predictions_ahead_in_future, mode='lines+markers', name='80 months Future Predictions'))
# Update the layout
fig.update_layout(title=f'Natural Gas Energy Consumption Forecast Country: Sector {sector_name}',
xaxis_title='Year', yaxis_title='Energy Consumption')
# Show the plot
fig.show()
except:
print('Error Occurred in the Given Sector:', sector_name)
Length of Train Data: 582 Length of Test Data: 20
Length of Train Data: 582 Length of Test Data: 20
Length of Train Data: 582 Length of Test Data: 20
Length of Train Data: 582 Length of Test Data: 20 Error Occurred in the Given Sector: NG-Industrial-sector_02 Length of Train Data: 582 Length of Test Data: 20
Length of Train Data: 582 Length of Test Data: 20
Length of Train Data: 582 Length of Test Data: 20
Length of Train Data: 582 Length of Test Data: 20
Length of Train Data: 582 Length of Test Data: 20 Error Occurred in the Given Sector: NG-Transportation-sector_01 Length of Train Data: 582 Length of Test Data: 20
Length of Train Data: 582 Length of Test Data: 20
Length of Train Data: 582 Length of Test Data: 20
# import pandas as pd
# import plotly.graph_objects as go
# from statsmodels.tsa.arima.model import ARIMA
# from dateutil.relativedelta import relativedelta
# import numpy as np
# from pmdarima import auto_arima
# # Iterate over each country and sector
# for country in df['County'].unique():
# for sector in df['Sector'].unique():
# fig = go.Figure()
# # Get the energy consumption data for the current country and sector
# df_filter = df[(df['County'] == country) & (
# df['Sector'] == sector)][['Timestamp', 'Yearly Data']]
# df_filter_index = df_filter.set_index('Timestamp')
# # Prepare the data for modeling
# years = df_filter_index.index
# energy_consumption = df_filter_index.values.flatten()
# # Split the data into training and testing
# # Use all data except the last 5 years for training
# train_data = energy_consumption[:-5]
# test_data = energy_consumption[-5:] # Use the last 5 years for testing
# # Fit the auto ARIMA model
# model = auto_arima(train_data, seasonal=False)
# model.fit(train_data)
# # Generate predictions
# predictions = model.predict(n_periods=len(test_data))
# # Plot the training data
# fig.add_trace(go.Scatter(
# x=years[:-5], y=train_data, mode='lines+markers', name='Training Data'))
# # Plot the predictions
# fig.add_trace(go.Scatter(
# x=years[-5:], y=test_data, mode='lines+markers', name='Testing Data'))
# fig.add_trace(go.Scatter(
# x=years[-5:], y=predictions, mode='lines+markers', name='Predictions'))
# # Update the layout
# fig.update_layout(title=f'Energy Consumption Forecast Country : {country} : Sector {sector} ',
# xaxis_title='Year', yaxis_title='Energy Consumption')
# # Show the plot
# fig.show()